###################################################################################################
####################################       read in data      ######################################
###################################################################################################
cccol <- c("#CE0013","#16557A","#C7A609","#87C232","#64C0AB","#A14C94","#15A08C","#8B7E75","#1E7CAF","#EA425F","#46489A","#E50033","#0F231F","#1187CD")
library(amap)
############ 2nd naive gene expression
logfpkm2nd <- read.table("../2nd.reprogramming.lg2.all.fpkm.txt",header=T,row.names=1)
n_path <- c("hiF_r1","hiF_r2","he0_r1","he0_r2","he2_r1","he2_r2","he6_r1","he6_r2","n8_r1","n8_r2","n8_r3","n12_r1","n12_r2","n14_r1","n14_r2","n14_r3","n20_r1","n20_r2","n20_r3","n24p_r1","n24p_r2","n24m_r1","n24m_r2","niPS_r1","niPS_r2")
nData_tmp <- logfpkm2nd[,n_path]
nfpkm2nd <- 2**nData_tmp - 1
n_time_point <- c("hiF","he0","he2","he6","n8","n12","n14","n20","n24pdox","n24mdox","niPS")
n_label <- c("hiF-T","0d","2d","6d","8d","12d","14d","20d","24d+dox","24d-dox","niPSC-T")
nData2ndfpkm <- cbind(apply(nfpkm2nd[,1:2],1,mean),apply(nfpkm2nd[,3:4],1,mean),apply(nfpkm2nd[,5:6],1,mean),apply(nfpkm2nd[,7:8],1,mean),apply(nfpkm2nd[,9:11],1,mean),apply(nfpkm2nd[,12:13],1,mean),apply(nfpkm2nd[,14:16],1,mean),apply(nfpkm2nd[,17:19],1,mean),apply(nfpkm2nd[,20:21],1,mean),apply(nfpkm2nd[,22:23],1,mean),apply(nfpkm2nd[,24:25],1,mean))
colnames(nData2ndfpkm) <- n_time_point
rownames(nData2ndfpkm) <- rownames(nfpkm2nd)
nData <- log2(nData2ndfpkm[,]+1)
n_deg <- read.table("../../Fig2/Gfold/cutoff.0.58/naive.2nd.deg")[,1]
n_deg <- intersect(n_deg,rownames(nData))
nData <- nData[n_deg,]
############ 2nd primed RNAseq
pData2ndfpkm <- read.table("../paper.primed.fpkm.txt",header=T,row.names=1)
pData <- log2(pData2ndfpkm+1)
common_time_point <- c("hiF-T","2d","6d","8d","14d","20d","24d+dox","24d-dox","iPSC-T")
############ 2nd naive DNA methylation ratio on promoter
methyratio_promoter <- read.table("../2nd_average_methratio_on_promoter.txt",header=T,row.names=1)
n_path <- c("hiFT","he6","n12","n20","n24","niPS")
p_path <- c("hiFT","he6","p20","p24","piPS")

###################################################################################################
####################################       correltaion       ######################################
###################################################################################################
library(amap)
k <- 14
set.seed(4)

km <- kmeans(nData,k)
nData <- log2(nData2ndfpkm[,]+1)
n_deg <- intersect(n_deg,rownames(nData))
nData <- nData[n_deg,]

km <- Kmeans(nData,k,method = "correlation")

# naive
common_genes <- intersect(rownames(methyratio_promoter),n_deg)
length(common_genes)
n_cor <- c()
n_cor_pvalue <- c()
n_gene <- c()
n_len <- length(n_path)
for (each in common_genes){
	if (sum(is.na(c(as.numeric(methyratio_promoter[each,c("hiFT","he6")]),as.numeric(apply(methyratio_promoter[each,c("n12","n12_r2")],1,mean)),as.numeric(methyratio_promoter[each,c("n20","n24","niPS")])))) < n_len-2){
		cor_test <- cor.test(c(as.numeric(methyratio_promoter[each,c("hiFT","he6")]),as.numeric(apply(methyratio_promoter[each,c("n12","n12_r2")],1,mean)),as.numeric(methyratio_promoter[each,c("n20","n24","niPS")])),nData[each,c("hiF","he6","n12","n20","n24pdox","niPS")])
		n_cor <- c(n_cor,cor_test$estimate)
		n_cor_pvalue <- c(n_cor_pvalue,cor_test$p.value)
		n_gene <- c(n_gene,each)
	}
}
names(n_cor) <- n_gene
names(n_cor_pvalue) <- n_gene

################### p-value cutoff
# pvalue_cutoff <- 0.01
# nm_gene <- names(which(na.omit(n_cor_pvalue) < pvalue_cutoff)) 
# nm_negative_gene <- names(which((na.omit(n_cor_pvalue) < pvalue_cutoff)&na.omit(n_cor)<0))
# nm_positive_gene <- names(which((na.omit(n_cor_pvalue) < pvalue_cutoff)&na.omit(n_cor)>0))
# write.table(cbind(nm_negative_gene),file="naiveMethylationNegativeGenes.txt",col.names=F,row.names=F,quote=F)
# write.table(cbind(nm_positive_gene),file="naiveMethylationPositiveGenes.txt",col.names=F,row.names=F,quote=F)


################### correlation cutoff
naive_cutoff <- -0.85
nm_gene <- names(which(na.omit(n_cor < naive_cutoff)))
other_gene <- setdiff(names(na.omit(n_cor_pvalue)),nm_gene)
nm_negative_gene <- nm_gene
write.table(cbind(nm_negative_gene),file="naiveMethylationNegativeGenes.txt",col.names=F,row.names=F,quote=F)

nm_cluster_gene <- intersect(nm_negative_gene,n_deg)
total_number <- nrow(nData)
pval <- c()
ratio <- c()
cluster_ratio <- c()
number <- c()
for (each_cluster in seq(k)){
    GroupB <- names(which(km$cluster==each_cluster))
	common_number <- length(intersect(nm_cluster_gene,GroupB))
	a_number <- length(nm_cluster_gene)
	b_number <- length(GroupB)
	number <- c(number,common_number)
	pval <- c(pval,1-phyper(common_number-1, a_number, total_number-a_number, b_number))
	# print(c(common_number-1, a_number, total_number-a_number, b_number))
	ratio <- c(ratio,common_number/a_number)
	cluster_ratio <- c(cluster_ratio,common_number/b_number)
	write.table(cbind(intersect(nm_cluster_gene,GroupB)),file=paste("naiveMethylatedNegativeGeneCluster",each_cluster,".txt",sep=""),col.names=F,row.names=F,quote=F)
}
pdf("naive.methylated.negative.gene.pvalue.pdf",width=7,height=4)
barplot(-log10(pval+10e-9),names.arg=seq(k),col=replace(rep("black",k),c(1,11),cccol[1]),border=NA,ylab="-log10(p-value)")
abline(h=2,lty=2)
dev.off()

pdf("naive.methylation.expression.correlation.pdf",width=5,height=4)
# plot(na.omit(n_cor)[other_gene],-log10(na.omit(n_cor_pvalue)[other_gene]),pch='.',xlab="correlation",ylab="-log10(p-value)",ylim=c(0,5))
# points(na.omit(n_cor)[nm_gene],-log10(na.omit(n_cor_pvalue)[nm_gene]),pch='.',col=cccol[1])
smoothScatter(na.omit(n_cor),-log10(na.omit(n_cor_pvalue)),pch='.',xlab="correlation",ylab="-log10(p-value)",ylim=c(0,5))
points(na.omit(n_cor)[other_gene],-log10(na.omit(n_cor_pvalue)[other_gene]),pch='.')
points(na.omit(n_cor)[nm_gene],-log10(na.omit(n_cor_pvalue)[nm_gene]),pch='.',col=cccol[1])
# abline(h=-log10(pvalue_cutoff),lty=2,col=cccol[1])
abline(v=0,lty=2)
# plot(na.omit(n_cor),na.omit(n_cor_pvalue),pch='.',xlab="correlation",ylab="p-value")
dev.off()
